learning resources

proof of least squares solution

https://math.stackexchange.com/questions/131590/derivation-of-the-formula-for-ordinary-least-squares-linear-regression

linear regression using the linear model (lm) function

how to obtain coefficients (intercept and slope for a linear model)

library(UsingR)
library(ggplot2)

# generate some fake data
set.seed(1234)
beta <- 2
intercept <- 10
n <- 50
m <- 10
s <- 10
noise <- rnorm(n, mean = m, sd = s)
observed <- runif(n = 50, min = 1, max = 100 )
outcome <- beta*observed + rep(intercept, n) + noise
fake <- as.data.frame(cbind(observed, outcome))

# plot the data
plot(fake$observed, fake$outcome,  
     xlab = "observed (units)", 
     ylab = "outcome (units)", 
     bg = "lightblue", 
     col = "black", cex = 1.1, pch = 21,frame = FALSE)


# calculate the coefficients of the linear model
fit <- lm(outcome ~ observed, data = fake)


#plot the regression line and the predicted points
abline(fit, lwd = 2)
points(fake$observed, predict(fit), pch = 19, col = "red") 

# examine the coefficients and the generated model
coef(fit)
(Intercept)    observed 
  14.528576    2.017873 
fit

Call:
lm(formula = outcome ~ observed, data = fake)

Coefficients:
(Intercept)     observed  
     14.529        2.018  

regression on centred data will give intercept that is y value for mean value of x

#the I notation is a shortcut to allow evaluation of variable in-line with lm call
#fit2 <- lm(outcome ~ I(observed - mean(observed)), data = fake)
observed.c <- observed - mean(observed)
fit2 <- lm(outcome ~ observed.c, data = fake)
coef(fit2)
## (Intercept)  observed.c 
##  120.756980    2.017873
plot(observed.c, fake$outcome,  
     xlab = "observed (units)", 
     ylab = "outcome (units)", 
     bg = "lightblue", 
     col = "black", cex = 1.1, pch = 21,frame = FALSE)




#plot the regression line and the predicted points
abline(fit2, lwd = 2)
points(observed.c, predict(fit2), pch = 19, col = "red") 

# examine the coefficients and the generated model
coef(fit2)
## (Intercept)  observed.c 
##  120.756980    2.017873
fit2
## 
## Call:
## lm(formula = outcome ~ observed.c, data = fake)
## 
## Coefficients:
## (Intercept)   observed.c  
##     120.757        2.018

making predictions with a model

# x = observations for which we want to make predictions using the generated model
x <- c(10,50,90)
# x must be passed as a column named after the expected predictor variable in a df
predict(fit, newdata = data.frame(observed = x))
##        1        2        3 
##  34.7073 115.4222 196.1371

plotting regression lines in ggplot

https://www.rstudio.com/wp-content/uploads/2015/03/ggplot2-cheatsheet.pdf

g <- ggplot(fake, aes(x=observed, y=outcome))
g <- g + xlab("observed (units)")
g <- g + ylab("outcome (units)")
g <- g + geom_point(size = 2, colour = "black", alpha = 0.4)
g <- g + geom_point(size = 1, colour = "blue", alpha = 0.2)
g <- g + geom_smooth(method="lm", colour="black")
g

plotting interactive regression plot in plotly

https://plot.ly/ggplot2/geom_abline/

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
## 
##     subplot
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- ggplotly(g)
p

obtain residuals for a fit

e <- resid(fit)
#or
fit$residuals
##           1           2           3           4           5           6 
##  -6.6798441   7.2279911  15.8020713 -18.3647334   9.5081587   9.9378445 
##           7           8           9          10          11          12 
##  -0.5682171  -0.2427242  -0.9616028  -3.5152021  -0.5805001  -4.7086154 
##          13          14          15          16          17          18 
##  -3.9904686   5.8825933  14.6598285   2.7350679  -1.3301476  -4.1523455 
##          19          20          21          22          23          24 
##  -3.1366371  28.2013977   5.4775004  -1.0740435  -0.7117815   8.3820214 
##          25          26          27          28          29          30 
##  -2.3438252  -9.5300548  10.7560202  -5.6734718   4.4228302  -4.4693968 
##          31          32          33          34          35          36 
##  14.7739582  -0.4243626  -1.8663316  -0.3075739 -12.4551867  -7.0503589 
##          37          38          39          40          41          42 
## -17.9537706  -9.0140349   1.3927993  -0.7433233  19.0589422  -6.9733317 
##          43          44          45          46          47          48 
##  -3.6740678   1.7955705  -5.1215069  -5.3418585  -6.9318270  -8.0677362 
##          49          50 
##  -1.5201453  -0.5355678
#